import aspose.words as aw
import re
# Load the document from the disc.
doc = aw.Document("test.doc")

text=doc.get_text()
link_pattern = r'https://pan\.baidu\.com/share/init\?surl=[a-zA-Z0-9_-]+'
code_pattern = r'提取码[:：]?\s*([a-zA-Z0-9_-]+)'

links = re.findall(link_pattern, text)
codes = re.findall(code_pattern, text)
links = list(set(links))
print(len(links))
print(len(codes))
codes = list(set(codes))
for link, code in zip(links, codes):
    print(f"链接: {link}")
    print(f"提取码: {code}")
    print("------")


